
%!TEX program = xelatex
%!TEX TS-program = xelatex
%!TEX encoding = UTF-8 Unicode

\documentclass[10pt]{article} 

\input{wang_preamble.tex}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{titling}
\setlength{\droptitle}{-2cm}   % This is your set screw

%%文档的题目、作者与日期
\author{学号 \underline{\hspace{4cm}} \hspace{1cm} 姓名 \underline{\hspace{4cm}} }
\title{多元统计分析练习5.3-5.5}
%\date{\vspace{-3ex}}
\renewcommand{\today}{\number\year \,年 \number\month \,月 \number\day \,日}
\date{2024 年 5 月 21 日}
%\date{March 9, 2021}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}

\maketitle

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{enumerate}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 01
设有 $k$ 个组 $\pi_1,\pi_2,\cdots,\pi_k$, 设组 $\pi_i$ 的密度函数为 $f_i(x)$, 设样品 $x$ 来自组 $\pi_i$ 的先验概率为 $p_i$, 满足 $p_1+p_2+\cdots+p_k=1$. 写出最大后验概率法的判别规则。

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 02
例子5.3.1. 设有三个组。设先验概率 $p_1=0.05$, $p_2=0.65$, $p_3=0.30$. 
设某样品 $x_0$ 未知分组，设概率密度值分别为 $f_1(x_0)=0.10$, $f_2(x_0)=0.63$, $f_3(x_0)=2.4$. 
计算 $x_0$ 属于各组的后验概率。

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 03
设有 $k$ 个组 $\pi_1,\pi_2,\cdots,\pi_k$ 都是正态总体，设 $\pi_i\sim N_p(\mu_i, \Sigma_i)$.
%, \Sigma_i>0, i=1,2,\cdots,k$. 
设样品 $x$ 来自组 $\pi_i$ 的先验概率为 $p_i$. 
写出最大后验概率法的判别规则。

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 04
例子5.3.2. 设有21家破产企业和25家非破产企业的财务数据，参见表格5.2.1. 
%其中变量 $x_1=$现金流量/总债务，$x_2=$净收入/总资产，$x_3=$流动资产/流动债务，$x_4=$流动资产/净销售额。
%I组为破产企业，II组为非破产企业。
设已知破产企业所占的比例为10\%. 
现有某未判企业 $x=(-0.16, -0.10, 1.45, 0.51)'$. 计算未判企业分类的后验概率。% $P(\pi_1\mid x)$ 和 $P(\pi_2\mid x)$. 

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 05
例子5.3.3. 设组 $\pi_1$ 和 $\pi_2$ 的概率密度分别为 $f_1(x)$ 和 $f_2(x)$, 设误判代价为 $c(1\mid 2)=12$ 个单位，
$c(2\mid 1)=4$ 个单位。设先验概率为 $p_1=0.6, p_2=0.4$. 写出最小期望误判代价法的判别规则。

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 06
费希尔判别的基本思想是用向量 $x=(x_1,x_2,\cdots,x_p)'$ 的少数几个线性组合 $y_1=a_1'x$, $y_2=a_2'x$, $\cdots$, $y_r=a_r'x$ 来代替原始的 $p$ 个变量$x_1,x_2,\cdots,x_p$, 并根据这 $r$ 个判别函数将各组分离。
举例说明这个思想。

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 07
设来自组 $\pi_i\,(i=1,2,\cdots,k)$ 的 $p$ 维观测值为 $x_{ij}\, (j=1,2,\cdots,n_i)$.  
将它们共同投影到某一 $p$ 维常数向量 $a$ 上，得到的投影点可分别对应线性组合 $y_{ij}=a'x_{ij}$. 
\begin{enumerate}
\item  写出 $y_{ij}$ 的组间平方和 $\mathrm{SSTR}$.  
\item  写出 $y_{ij}$ 的组内平方和 $\mathrm{SSE}$.  
\item  写出费希尔第一线性判别函数。
\item  写出费希尔第二线性判别函数。
\item  写出前两个判别函数的累计贡献率。
\end{enumerate}

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 08
例子5.4.1. 费希尔于1936年发表的鸢尾花数据参见表格5.4.1. 
三种鸢尾花各有容量为50的样本，测量数据为 $x_1=$花萼长，$x_2=$花萼宽，$x_3=$花瓣长，$x_4=$花瓣宽。
\begin{enumerate}
\item  计算费希尔第一和第二线性判别函数。
\item  计算前两个判别函数的累计贡献率。
\item  使用回代法估计误判概率。
\end{enumerate}

\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 09
设数据的变量为 $\vec{x}=(x_1,\cdots,x_r,x_{r+1},\cdots,x_p)'$. 
设 $\vec{x}_1=(x_1,\cdots,x_r)'$ 是原先用作判别的变量，
而 $\vec{x}_2=(x_{r+1},\cdots,x_p)'$ 是新引入的变量。
我们想知道在已有 $\vec{x}_1$ 用作判别的条件下，$\vec{x}_2$ 所提供的附加信息能否使得区分各组的能力有显著提高。
\begin{enumerate}
\item  将问题写成一个假设检验问题。
\item  写出检验统计量和拒绝规则。
\end{enumerate}


\vspace{0.1cm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item  %Problem 10
例子5.5.1. 对鸢尾花数据作逐步判别进行变量选择。

\vspace{0.1cm}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{enumerate}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


